package org.example.analyse;

import cn.hutool.poi.excel.ExcelReader;
import cn.hutool.poi.excel.ExcelUtil;
import com.alibaba.fastjson.JSON;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.seg.common.Term;

import java.util.*;

/**
 * @Description: TODO
 * @Author wyatt
 * @Data 2024/09/29 16:38
 */
public class G2Analyse {

    private static final List<String> natureList = Arrays.asList("w", "", "");

    public static void main(String[] args) {
        ExcelReader reader = ExcelUtil.getReader("D:\\Desktop\\shopify\\g2_20240926_翻译.xlsx");
        List<Map<String, Object>> readAll = reader.readAll();


        HashMap<String, Integer> countMap = new HashMap<>();
        HashMap<String, Integer> natureCountMap = new HashMap<>();

        int max = 10000;
        int count = 0;
        for (Map<String, Object> stringObjectMap : readAll) {
//            System.out.println(stringObjectMap.keySet());
//            System.out.println(JSON.toJSONString(stringObjectMap.get("likeReasonTranslate")));
//            System.out.println(JSON.toJSONString(stringObjectMap.get("disLikeReasonTranslate")));

            String content = stringObjectMap.get("disLikeReasonTranslate").toString();
            List<Term> termList = HanLP.segment(content);

            content.replaceAll("/", "");

            for (Term term : termList) {
                String word = term.toString().split("/")[0];
                String nature = term.toString().split("/")[1];

                if(natureList.contains(nature)){
                    continue;
                }

//                System.out.println(term.toString());

                countMap.putIfAbsent(word, 0);
                countMap.put(word, countMap.get(word) + 1);


//                natureCountMap.putIfAbsent(nature, 0);
//                natureCountMap.put(nature, natureCountMap.get(nature) + 1);
            }

            count++;
            if(count >= max){
                break;
            }
        }

        // 将HashMap转换为ArrayList并按值降序排序
        List<Map.Entry<String, Integer>> countList = new ArrayList<>(countMap.entrySet());
        countList.sort((o1, o2) -> o2.getValue().compareTo(o1.getValue()));

        // 打印前100个元素
        count = 0;
        for (Map.Entry<String, Integer> entry : countList) {
            if (count >= 1000) {
                break;
            }
            System.out.println("countList Key: " + entry.getKey() + ", Value: " + entry.getValue());
            count++;
        }


        // 将HashMap转换为ArrayList并按值降序排序
        List<Map.Entry<String, Integer>> natureList = new ArrayList<>(natureCountMap.entrySet());
        natureList.sort((o1, o2) -> o2.getValue().compareTo(o1.getValue()));

        // 打印前100个元素
        count = 0;
        for (Map.Entry<String, Integer> entry : natureList) {
            if (count >= 1000) {
                break;
            }
            System.out.println("natureList Key: " + entry.getKey() + ", Value: " + entry.getValue());
            count++;
        }


    }
}
